package com.atguigu.day04;

import com.atguigu.bean.UserBehavior;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.util.HashSet;

public class Flink10_Project_UV {
    public static void main(String[] args) throws Exception {
        //1.获取流的执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //2.从文件读取数据
        env.readTextFile("input/UserBehavior.csv")
                .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                    @Override
                    public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
                        //1.切分数据获取到每一列数据
                        String[] split = value.split(",");

                        //2.将一行数据组成JavaBean
                        UserBehavior userBehavior = new UserBehavior(Long.parseLong(split[0]),
                                Long.parseLong(split[1]),
                                Integer.parseInt(split[2]),
                                split[3],
                                Long.parseLong(split[4]));
                        if ("pv".equals(userBehavior.getBehavior())) {
                            out.collect(Tuple2.of("uv", userBehavior.getUserId()));
                        }
                    }
                })
                .keyBy(0)
                .process(new KeyedProcessFunction<Tuple, Tuple2<String, Long>, Tuple2<String, Integer>>() {
                    //创建set集合用来对用户id去重，并统计去重后的个数
                    HashSet<Long> uids = new HashSet<>();

                    @Override
                    public void processElement(Tuple2<String, Long> value, Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception {
                        //1.将数据中携带的用户id放入set集合
                        uids.add(value.f1);
                        //2.取出set集合的大小（去重后的用户个数）
                        int uidCount = uids.size();
                        out.collect(Tuple2.of("uv", uidCount));
                    }
                }).print();

        env.execute();

    }
}
